/*==================================================
project:       Cleans up SISBEN for use
Author:        David L. Vargas 
E-email:       davidvar@iadb.org
url:           
Dependencies:  
----------------------------------------------------
Creation Date:    May 2023
Modification Date:   
Do-file version:    01
References:          
Output:             
==================================================*/

/*==================================================
			0: Program set up
==================================================*/
*Written on STATA 17
drop _all
set varabbrev off	// no variable abbreviations allowed (personal preference)

** source dir
cd "${dir5r}" // graph dir

/*==================================================
			1: load and transformations
==================================================*/

*----------- 1.0.1 Load CRRA welfare FUN
do "${dir6r}/ados/bstrap_pmt.ado" // load bstap FUN


*Globals with updted survey variables
glo hh_char_srv urban age_feb20_srv prop_kids_srv n_members_srv elementary_srv highschool_srv tertiary_edu_srv children_srv living_couple
glo assets_srv own_washing_machine_srv own_tractor_srv own_moto_srv own_car_srv own_PC_srv own_fridge_srv
glo hh_srv  wall_finished_srv floor_finished_srv cookpower_connected_srv wc_flush_srv kitchen_srv electricity_srv running_water_srv trash_service_srv 
glo labour_srv formal_work_srv informal_work_srv
glo pmt_1_srv  $hh_char_srv $assets_srv $hh_srv $labour_srv

*----------  1.1. Data prep:
use "${dir3r}/01_survey/survey_targeting_r1_long.dta", clear

*----------  1.1.2 Estimate PMT for survey data:
	set seed  `=339487731'
	
	// An uniform have 50% probably of being > 0.5, let's use that for the split
	tempvar _sample_srv
	tempvar _random 
	tempvar _aux
	gen  `_random' = runiform() if year == 2019
	bys id_hogar_SISBEN : egen `_aux' = max(`_random')
	replace `_random' = `_aux'
	gen `_sample_srv' = 1
	replace `_sample_srv' = 2 if `_random' > 0.5
	lab var `_sample_srv' "Training or testing sample"
	lab define lsmaple 1 "Training" 2 "Testing", replace
	lab values `_sample_srv' lsmaple
	drop `_random' `_aux'
	
	reg l_pp_inc_srv $pmt_1_srv [aw=pondera] if `_sample_srv'==1 & year==2019

	
	foreach var in `predvars' {
		tempvar aux
		gen  `_aux'=`var' if year==2019
		tempvar maux
		egen `maux'=mean(`_aux'), by(id_hogar_SISBEN)
		replace `var'=`maux'
		drop `maux' `_aux'
	}
	
	predict pred_income01_srv if `_sample_srv' == 2  , xb

*----------- 1.3 Define budget
*scalar prog_budget_colombia = 3000000 * 160000
scalar prog_budget_colombia = 1997181822314 / 12  // Transfer expenses from 
scalar nfamilies_colombia = 50372424 / 2.9 // inhabitans by average family size
scalar	ph_budget = prog_budget_colombia / nfamilies_colombia



*------------ poverty line
scalar epovline = 137350
scalar l_epovline = log(epovline)
local base=1.90*1515.1
// 2019 PPP factor https://data.worldbank.org/indicator/PA.NUS.PRVT.PP?locations=CO 
/*==================================================
			2: Panel A
==================================================*/ 

*---------- 1.4. Prediction


//>>>>>>>>>>>>> 1.4.2 Actual prediction

*----------- 2.1 Estimation

// define targeted people that should be actully covered
cap drop targeted
gen targeted = l_pp_inc_srv < l_epovline
replace targeted = . if l_pp_inc_srv == .

gen covered_epov = (pred_income01_srv  < l_epovline)
replace covered_epov = . if missing(pred_income01_srv)
gen inclusion_err_epov = (covered_epov == 1) if targeted == 0 & !missing(pred_income01_srv )
gen exclusion_err_epov = (covered_epov == 0) if targeted == 1 & !missing(pred_income01_srv )
gen I_exclusion=1-exclusion_err_epov
keep if _sample_srv==2

sum inclusion_err_epov [aw=pondera]
local inc=r(mean)

sum I_exclusion [aw=pondera]
local ier=r(mean)

* Estimation 
loc i = 0
forv y = 2019 / 2020 {

preserve

keep if year == `y'
*keep if year == 2019

* Rank households by predicted income
cap drop running 
cap drop ranking_pmt
drop if missing(income_pp_nt)
xtile ranking_pmt = pred_income01_srv if !missing(income_pp_nt), n(60)
sum ranking_pmt
sca ntop = r(max)
sca nbot = r(min)

qui {
	forv k = 0 / `=ntop' {
	loc ++i
	// define covered based on PMT income
	cap drop covered 
	gen covered = (ranking_pmt <= `k')
	replace covered = . if missing(pred_income01_srv)
	
	count if covered == 1 & !missing(pred_income01_srv) & !missing(income_pp_nt)
	sca base = r(N)
	
	// inclusion and exclusion error 
	cap drop inclusion_err
	cap drop exclusion_err
	gen inclusion_err = (covered == 1) if targeted == 0 & !missing(pred_income01_srv)
	gen exclusion_err = (covered == 0) if targeted == 1 & !missing(pred_income01_srv)

	qui count if inclusion_err == 1
	loc n_incerr = r(N)
	
	qui count if exclusion_err == 1
	loc n_excerr = r(N)
	
	qui sum inclusion_err /*[aw=pondera] */ if !missing(pred_income01_srv) & !missing(income_pp_nt)
	loc p_incerr = r(mean)
	qui sum exclusion_err /*[aw=pondera] */ if !missing(pred_income01_srv) & !missing(income_pp_nt)
	loc p_excerr = r(mean)


	// PP benefit subject to budget 
		* update budget 
	count if !missing(pred_income01_srv) & !missing(income_pp_nt)
	scalar ssize = r(N)
	scalar budget = ph_budget * ssize

	cap drop hh_benefit
	gen hh_benefit = budget / base
	*local hh_benefit=hh_benefit
	if (base == 0 | `k'==0 )	replace	hh_benefit = 0 
	sca hh_benefits = budget / base 
	
	cap drop pp_benefits_c
	gen pp_benefits_c =0  
	replace pp_benefits_c = hh_benefit / n_members if covered==1 & !missing(pred_income01_srv) & !missing(income_pp_nt)

	// CRRA

	scalar rho=3
	local base=1.90*1515.1
	gen income_hh = `base' + income_pp_nt + pp_benefits_c if !missing(pred_income01_srv) & year == `y'  
	gen crra = (((income_hh)^(1-rho))*2)/(1-rho) if !missing(pred_income01_srv) 
	sum crra /*[aw = pondera]*/ if year == `y'
	sca totalU = r(sum)
	drop crra income_hh

	noi di "`y' - pred: `k' : `p_incerr' `p_excerr' `hh_benefit' "
	
	// Store results in matrix
	mat estt = J(1,9,.)
	mat estt[1,1] = `y'
	mat estt[1,2] = `k'
	mat estt[1,3] = `p_incerr'
	mat estt[1,4] = `p_excerr'
	mat estt[1,5] = base
	mat estt[1,6] = `n_incerr'
	mat estt[1,7] = `n_excerr'
	*mat estt[1,8] =	`ud'
	mat estt[1,8] =	totalU
	mat estt[1,9] =	hh_benefits
	
	// add results to final matrix 
	if (`i' == 1)		mat dat = estt
	else 				mat dat = dat \ estt
	}
} // end quietly
	
restore 
} // end of year loop 


*----------- 2.2 Plotting

cap frame create results
frame change results

drop _all

svmat double dat

rename dat1  year
rename dat2  sisben_subgrupo
rename dat3  p_incerr
rename dat4  p_excerr
rename dat5  base
rename dat6  n_incerr
rename dat7  n_excerr
rename dat8  crra
rename dat9  hh_benefits

replace hh_benefits = hh_benefits / 1515.1 // 2019 PPP factor https://data.worldbank.org/indicator/PA.NUS.PRVT.PP?locations=CO

gen truepos = 1 - p_excerr
order truepos , before(base)


** ROC 
sort p_incerr
#delimit ;
tw (line truepos p_incerr if year == 2019, lpattern(shortdash) lwidth(mthick)) 
(function y = x, ra(truepos) clpat(.-.)) ,
graphregion(color(white)) bgcolor(white)
xtitle("Inclusion Error") xline(`inc')
ytitle("1 - Exclusion Error") 
xlab(0(0.1)1) 
name(A, replace)
legend(off)
;
#delimit cr

graph export "FA1_ROC_curve.png", as(png) replace
graph export "FA1_ROC_curve.pdf", as(pdf) replace


/* End of do-file */


















